Morphological variation of amazon and sailfin mollies {.tabset .tabset-fade .tabset-pills}
In this study, I am looking at various populations of amazon and sailfin mollies across their native/introduced range to assess morphological variation both within and among the species. I am using the Pickle fish collections for my samples.
## Using libcurl 7.64.1 with Schannel
I will use Shapiro-wilke, histograms, and QQ plots to determine what traits are normal. These will only be performed on continuous variables, as discrete variables are not normal by nature.
Conclusions: literally all of them are NOT normal… will log transform them for the PCA (I guess), but will perform non-parametric tests for the comparisons (Levene’s test instead of F-test, Mann Whitney U instead of T-test, Kruskal Wallis H Test instead of ANOVA).
shapiro.test(raw1$SL)
##
## Shapiro-Wilk normality test
##
## data: raw1$SL
## W = 0.9763, p-value = 2.763e-05
shapiro.test(raw1$BD)
##
## Shapiro-Wilk normality test
##
## data: raw1$BD
## W = 0.96287, p-value = 1.787e-07
shapiro.test(raw1$CPD)
##
## Shapiro-Wilk normality test
##
## data: raw1$CPD
## W = 0.96431, p-value = 2.908e-07
shapiro.test(raw1$CPL)
##
## Shapiro-Wilk normality test
##
## data: raw1$CPL
## W = 0.97288, p-value = 6.831e-06
shapiro.test(raw1$PreDL)
##
## Shapiro-Wilk normality test
##
## data: raw1$PreDL
## W = 0.97924, p-value = 9.997e-05
shapiro.test(raw1$DbL)
##
## Shapiro-Wilk normality test
##
## data: raw1$DbL
## W = 0.97697, p-value = 3.68e-05
shapiro.test(raw1$HL)
##
## Shapiro-Wilk normality test
##
## data: raw1$HL
## W = 0.94955, p-value = 3.057e-09
shapiro.test(raw1$HD)
##
## Shapiro-Wilk normality test
##
## data: raw1$HD
## W = 0.96761, p-value = 9.28e-07
shapiro.test(raw1$HW)
##
## Shapiro-Wilk normality test
##
## data: raw1$HW
## W = 0.97201, p-value = 4.833e-06
shapiro.test(raw1$SnL)
##
## Shapiro-Wilk normality test
##
## data: raw1$SnL
## W = 0.65042, p-value < 2.2e-16
shapiro.test(raw1$OL)
##
## Shapiro-Wilk normality test
##
## data: raw1$OL
## W = 0.98631, p-value = 0.003102
hist(raw1$SL)
hist(raw1$BD)
hist(raw1$CPD)
hist(raw1$CPL)
hist(raw1$PreDL)
hist(raw1$DbL)
hist(raw1$HL)
hist(raw1$HD)
hist(raw1$HW)
hist(raw1$SnL)
hist(raw1$OL)
qqnorm(raw1$SL)
qqline(raw1$SL)
qqnorm(raw1$BD)
qqline(raw1$BD)
qqnorm(raw1$CPD)
qqline(raw1$CPD)
qqnorm(raw1$CPL)
qqline(raw1$CPL)
qqnorm(raw1$PreDL)
qqline(raw1$PreDL)
qqnorm(raw1$DbL)
qqline(raw1$DbL)
qqnorm(raw1$HL)
qqline(raw1$HL)
qqnorm(raw1$HD)
qqline(raw1$HD)
qqnorm(raw1$HW)
qqline(raw1$HW)
qqnorm(raw1$SnL)
qqline(raw1$SnL)
qqnorm(raw1$OL)
qqline(raw1$OL)
Since amazons are in general bigger than sailfin, we don’t want any results to be due to this difference in body size bias. Therefore, we will see what traits are influenced by body size (regressions) and correct for body size when necessary (absolute value of residuals). We can then use the residuals when comparing between species for traits that are influenced by body size, and raw data for traits that are not influenced by body size. I will also calculate standardized residuals to compare residuals across traits in later analyses.
Quick results summary: traits not influenced by body size are left & right pelvic, anal, scales below lateral line and fluctuating asymmetry; all other traits influenced by body size.
library(ggplot2)
library(ggpubr)
##### LAT #####
reg.lat.D <- lm(lat$D ~ lat$SL)
sd.lat.D <- rstandard(reg.lat.D)
reg.lat.D.plot <- ggplot(lat, aes(x = SL, y = D)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.D.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.P1 <- lm(lat$P1 ~ lat$SL)
sd.lat.P1 <- rstandard(reg.lat.P1)
reg.lat.P1.plot <- ggplot(lat, aes(x = SL, y = P1)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.P1.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.P2.L <- lm(lat$P2.L ~ lat$SL)
sd.lat.P2.L <- rstandard(reg.lat.P2.L)
reg.lat.P2.L.plot <- ggplot(lat, aes(x = SL, y = P2.L)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.P2.L.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.P2.R <- lm(lat$P2.R ~ lat$SL)
sd.lat.P2.R <- rstandard(reg.lat.P2.R)
reg.lat.P2.R.plot <- ggplot(lat, aes(x = SL, y = P2.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.P2.R.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.A <- lm(lat$A ~ lat$SL)
sd.lat.A <- rstandard(reg.lat.A)
reg.lat.A.plot <- ggplot(lat, aes(x = SL, y = A)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.A.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.P1.R <- lm(lat$P1.R ~ lat$SL)
sd.lat.P1.R <- rstandard(reg.lat.P1.R)
reg.lat.P1.R.plot <- ggplot(lat, aes(x = SL, y = P1.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.P1.R.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.LLSC <- lm(lat$LLSC ~ lat$SL)
sd.lat.LLSC <- rstandard(reg.lat.LLSC)
reg.lat.LLSC.plot <- ggplot(lat, aes(x = SL, y = LLSC)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.LLSC.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.SALL <- lm(lat$SALL ~ lat$SL)
sd.lat.SALL <- rstandard(reg.lat.SALL)
reg.lat.SALL.plot <- ggplot(lat, aes(x = SL, y = SALL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.SALL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.SBLL <- lm(lat$SBLL ~ lat$SL)
sd.lat.SBLL <- rstandard(reg.lat.SBLL)
reg.lat.SBLL.plot <- ggplot(lat, aes(x = SL, y = SBLL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.SBLL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.SBDF <- lm(lat$SBDF ~ lat$SL)
sd.lat.SBDF <- rstandard(reg.lat.SBDF)
reg.lat.SBDF.plot <- ggplot(lat, aes(x = SL, y = SBDF)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.SBDF.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.BD <- lm(lat$BD ~ lat$SL)
sd.lat.BD <- rstandard(reg.lat.BD)
reg.lat.BD.plot <- ggplot(lat, aes(x = SL, y = BD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.BD.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.CPD <- lm(lat$CPD ~ lat$SL)
sd.lat.CPD <- rstandard(reg.lat.CPD)
reg.lat.CPD.plot <- ggplot(lat, aes(x = SL, y = CPD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.CPD.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.CPL <- lm(lat$CPL ~ lat$SL)
sd.lat.CPL <- rstandard(reg.lat.CPL)
reg.lat.CPL.plot <- ggplot(lat, aes(x = SL, y = CPL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.CPL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.PreDL <- lm(lat$PreDL ~ lat$SL)
sd.lat.PreDL <- rstandard(reg.lat.PreDL)
reg.lat.PreDL.plot <- ggplot(lat, aes(x = SL, y = PreDL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.PreDL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.DbL <- lm(lat$DbL ~ lat$SL)
sd.lat.DbL <- rstandard(reg.lat.DbL)
reg.lat.DbL.plot <- ggplot(lat, aes(x = SL, y = DbL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.DbL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.HL <- lm(lat$HL ~ lat$SL)
sd.lat.HL <- rstandard(reg.lat.HL)
reg.lat.HL.plot <- ggplot(lat, aes(x = SL, y = HL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.HL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.HD <- lm(lat$HD ~ lat$SL)
sd.lat.HD <- rstandard(reg.lat.HD)
reg.lat.HD.plot <- ggplot(lat, aes(x = SL, y = HD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.HD.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.HW <- lm(lat$HW ~ lat$SL)
sd.lat.HW <- rstandard(reg.lat.HW)
reg.lat.HW.plot <- ggplot(lat, aes(x = SL, y = HW)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.HW.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.SnL <- lm(lat$SnL ~ lat$SL)
sd.lat.SnL <- rstandard(reg.lat.SnL)
reg.lat.SnL.plot <- ggplot(lat, aes(x = SL, y = SnL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.SnL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.OL <- lm(lat$OL ~ lat$SL)
sd.lat.OL <- rstandard(reg.lat.OL)
reg.lat.OL.plot <- ggplot(lat, aes(x = SL, y = OL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.OL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.lat.FLA <- lm(lat$FLA ~ lat$SL)
sd.lat.FLA <- rstandard(reg.lat.FLA)
reg.lat.FLA.plot <- ggplot(lat, aes(x = SL, y = FLA)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.lat.FLA.plot
## `geom_smooth()` using formula 'y ~ x'
##### FORM #####
reg.form.D <- lm(form$D ~ form$SL)
sd.form.D <- rstandard(reg.form.D)
reg.form.D.plot <- ggplot(form, aes(x =SL, y = D)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.D.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.P1 <- lm(form$P1 ~ form$SL)
sd.form.P1 <- rstandard(reg.form.P1)
reg.form.P1.plot <- ggplot(form, aes(x = SL, y = P1)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.P1.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.P2.L <- lm(form$P2.L ~ form$SL)
sd.form.P2.L <- rstandard(reg.form.P2.L)
reg.form.P2.L.plot <- ggplot(form, aes(x = SL, y = P2.L)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.P2.L.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.P2.R <- lm(form$P2.R ~ form$SL)
sd.form.P2.R <- rstandard(reg.form.P2.R)
reg.form.P2.R.plot <- ggplot(form, aes(x = SL, y = P2.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.P2.R.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.A <- lm(form$A ~ form$SL)
sd.form.A <- rstandard(reg.form.A)
reg.form.A.plot <- ggplot(form, aes(x = SL, y = A)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.A.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.P1.R <- lm(form$P1.R ~ form$SL)
sd.form.P1.R <- rstandard(reg.form.P1.R)
reg.form.P1.R.plot <- ggplot(form, aes(x = SL, y = P1.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.P1.R.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.LLSC <- lm(form$LLSC ~ form$SL)
sd.form.LLSC <- rstandard(reg.form.LLSC)
reg.form.LLSC.plot <- ggplot(form, aes(x = SL, y = LLSC)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.LLSC.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.SALL <- lm(form$SALL ~ form$SL)
sd.form.SALL <- rstandard(reg.form.SALL)
reg.form.SALL.plot <- ggplot(form, aes(x = SL, y = SALL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.SALL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.SBLL <- lm(form$SBLL ~ form$SL)
sd.form.SBLL <- rstandard(reg.form.SBLL)
reg.form.SBLL.plot <- ggplot(form, aes(x = SL, y = SBLL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.SBLL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.SBDF <- lm(form$SBDF ~ form$SL)
sd.form.SBDF <- rstandard(reg.form.SBDF)
reg.form.SBDF.plot <- ggplot(form, aes(x = SL, y = SBDF)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.SBDF.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.BD <- lm(form$BD ~ form$SL)
sd.form.BD <- rstandard(reg.form.BD)
reg.form.BD.plot <- ggplot(form, aes(x = SL, y = BD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.BD.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.CPD <- lm(form$CPD ~ form$SL)
sd.form.CPD <- rstandard(reg.form.CPD)
reg.form.CPD.plot <- ggplot(form, aes(x = SL, y = CPD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.CPD.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.CPL <- lm(form$CPL ~ form$SL)
sd.form.CPL <- rstandard(reg.form.CPL)
reg.form.CPL.plot <- ggplot(form, aes(x = SL, y = CPL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.CPL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.PreDL <- lm(form$PreDL ~ form$SL)
sd.form.PreDL <- rstandard(reg.form.PreDL)
reg.form.PreDL.plot <- ggplot(form, aes(x = SL, y = PreDL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.PreDL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.DbL <- lm(form$DbL ~ form$SL)
sd.form.DbL <- rstandard(reg.form.DbL)
reg.form.DbL.plot <- ggplot(form, aes(x = SL, y = DbL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.DbL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.HL <- lm(form$HL ~ form$SL)
sd.form.HL <- rstandard(reg.form.HL)
reg.form.HL.plot <- ggplot(form, aes(x = SL, y = HL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.HL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.HD <- lm(form$HD ~ form$SL)
sd.form.HD <- rstandard(reg.form.HD)
reg.form.HD.plot <- ggplot(form, aes(x = SL, y = HD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.HD.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.HW <- lm(form$HW ~ form$SL)
sd.form.HW <- rstandard(reg.form.HW)
reg.form.HW.plot <- ggplot(form, aes(x = SL, y = HW)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.HW.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.SnL <- lm(form$SnL ~ form$SL)
sd.form.SnL <- rstandard(reg.form.SnL)
reg.form.SnL.plot <- ggplot(form, aes(x = SL, y = SnL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.SnL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.OL <- lm(form$OL ~ form$SL)
sd.form.OL <- rstandard(reg.form.OL)
reg.form.OL.plot <- ggplot(form, aes(x = SL, y = OL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.OL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.form.FLA <- lm(form$FLA ~ form$SL)
sd.form.FLA <- rstandard(reg.form.FLA)
reg.form.FLA.plot <- ggplot(form, aes(x = SL, y = FLA)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.form.FLA.plot
## `geom_smooth()` using formula 'y ~ x'
##### MEX #####
reg.mex.D <- lm(mex$D ~ mex$SL)
sd.mex.D <- rstandard(reg.mex.D)
reg.mex.D.plot <- ggplot(mex, aes(x = SL, y = D)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.D.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.P1 <- lm(mex$P1 ~ mex$SL)
sd.mex.P1 <- rstandard(reg.mex.P1)
reg.mex.P1.plot <- ggplot(mex, aes(x = SL, y = P1)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.P1.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.P2.L <- lm(mex$P2.L ~ mex$SL)
sd.mex.P2.L <- rstandard(reg.mex.P2.L)
reg.mex.P2.L.plot <- ggplot(mex, aes(x = SL, y = P2.L)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.P2.L.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.P2.R <- lm(mex$P2.R ~ mex$SL)
sd.mex.P2.R <- rstandard(reg.mex.P2.R)
reg.mex.P2.R.plot <- ggplot(mex, aes(x = SL, y = P2.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.P2.R.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.A <- lm(mex$A ~ mex$SL)
sd.mex.A <- rstandard(reg.mex.A)
reg.mex.A.plot <- ggplot(mex, aes(x = SL, y = A)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.A.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.P1.R <- lm(mex$P1.R ~ mex$SL)
sd.mex.P1.R <- rstandard(reg.mex.P1.R)
reg.mex.P1.R.plot <- ggplot(mex, aes(x = SL, y = P1.R)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.P1.R.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.LLSC <- lm(mex$LLSC ~ mex$SL)
sd.mex.LLSC <- rstandard(reg.mex.LLSC)
reg.mex.LLSC.plot <- ggplot(mex, aes(x = SL, y = LLSC)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.LLSC.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.SALL <- lm(mex$SALL ~ mex$SL)
sd.mex.SALL <- rstandard(reg.mex.SALL)
reg.mex.SALL.plot <- ggplot(mex, aes(x = SL, y = SALL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.SALL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.SBLL <- lm(mex$SBLL ~ mex$SL)
sd.mex.SBLL <- rstandard(reg.mex.SBLL)
reg.mex.SBLL.plot <- ggplot(mex, aes(x = SL, y = SBLL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.SBLL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.SBDF <- lm(mex$SBDF ~ mex$SL)
sd.mex.SBDF <- rstandard(reg.mex.SBDF)
reg.mex.SBDF.plot <- ggplot(mex, aes(x = SL, y = SBDF)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.SBDF.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.BD <- lm(mex$BD ~ mex$SL)
sd.mex.BD <- rstandard(reg.mex.BD)
reg.mex.BD.plot <- ggplot(mex, aes(x = SL, y = BD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.BD.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.CPD <- lm(mex$CPD ~ mex$SL)
sd.mex.CPD <- rstandard(reg.mex.CPD)
reg.mex.CPD.plot <- ggplot(mex, aes(x = SL, y = CPD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.CPD.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.CPL <- lm(mex$CPL ~ mex$SL)
sd.mex.CPL <- rstandard(reg.mex.CPL)
reg.mex.CPL.plot <- ggplot(mex, aes(x = SL, y = CPL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.CPL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.PreDL <- lm(mex$PreDL ~ mex$SL)
sd.mex.PreDL <- rstandard(reg.mex.PreDL)
reg.mex.PreDL.plot <- ggplot(mex, aes(x = SL, y = PreDL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.PreDL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.DbL <- lm(mex$DbL ~ mex$SL)
sd.mex.DbL <- rstandard(reg.mex.DbL)
reg.mex.DbL.plot <- ggplot(mex, aes(x = SL, y = DbL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.DbL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.HL <- lm(mex$HL ~ mex$SL)
sd.mex.HL <- rstandard(reg.mex.HL)
reg.mex.HL.plot <- ggplot(mex, aes(x = SL, y = HL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.HL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.HD <- lm(mex$HD ~ mex$SL)
sd.mex.HD <- rstandard(reg.mex.HD)
reg.mex.HD.plot <- ggplot(mex, aes(x = SL, y = HD)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.HD.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.HW <- lm(mex$HW ~ mex$SL)
sd.mex.HW <- rstandard(reg.mex.HW)
reg.mex.HW.plot <- ggplot(mex, aes(x = SL, y = HW)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.HW.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.SnL <- lm(mex$SnL ~ mex$SL)
sd.mex.SnL <- rstandard(reg.mex.SnL)
reg.mex.SnL.plot <- ggplot(mex, aes(x = SL, y = SnL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.SnL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.OL <- lm(mex$OL ~ mex$SL)
sd.mex.OL <- rstandard(reg.mex.OL)
reg.mex.OL.plot <- ggplot(mex, aes(x = SL, y = OL)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.OL.plot
## `geom_smooth()` using formula 'y ~ x'
reg.mex.FLA <- lm(mex$FLA ~ mex$SL)
sd.mex.FLA <- rstandard(reg.mex.FLA)
reg.mex.FLA.plot <- ggplot(mex, aes(x = SL, y = FLA)) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
stat_cor(label.y = 10)
reg.mex.FLA.plot
## `geom_smooth()` using formula 'y ~ x'
STEP TWO: get residuals for each individual for traits that were influenced by body size
STEP THREE: convert residuals to absolute value
##### LAT #####
abs.lat.D <- abs(res.lat.D)
mean(abs.lat.D)
## [1] 0.5375505
abs.lat.P1 <- abs(res.lat.P1)
mean(abs.lat.P1)
## [1] 0.5466667
abs.lat.P1.R <- abs(res.lat.P1.R)
mean(abs.lat.P1.R)
## [1] 0.5799393
abs.lat.LLSC <- abs(res.lat.LLSC)
mean(abs.lat.LLSC)
## [1] 0.7664044
abs.lat.SALL <- abs(res.lat.SALL)
mean(abs.lat.SALL)
## [1] 0.3174361
abs.lat.SBLL <- abs(res.lat.SBLL)
mean(abs.lat.SBLL)
## [1] 0.2461336
abs.lat.BD <- abs(res.lat.BD)
mean(abs.lat.BD)
## [1] 0.7662013
abs.lat.CPD <- abs(res.lat.CPD)
mean(abs.lat.CPD)
## [1] 0.3693678
abs.lat.CPL <- abs(res.lat.CPL)
mean(abs.lat.CPL)
## [1] 0.463331
abs.lat.PreDL <- abs(res.lat.PreDL)
mean(abs.lat.PreDL)
## [1] 0.5692506
abs.lat.DbL <- abs(res.lat.DbL)
mean(abs.lat.DbL)
## [1] 0.6943292
abs.lat.HL <- abs(res.lat.HL)
mean(abs.lat.HL)
## [1] 0.5195023
abs.lat.HD <- abs(res.lat.HD)
mean(abs.lat.HD)
## [1] 0.3736227
abs.lat.HW <- abs(res.lat.HW)
mean(abs.lat.HW)
## [1] 0.3532098
abs.lat.SnL <- abs(res.lat.SnL)
mean(abs.lat.SnL)
## [1] 0.3559954
abs.lat.OL <- abs(res.lat.OL)
mean(abs.lat.OL)
## [1] 0.242467
##### FORM #####
abs.form.D <- abs(res.form.D)
mean(abs.form.D)
## [1] 0.5668177
abs.form.P1 <- abs(res.form.P1)
mean(abs.form.P1)
## [1] 0.4843616
abs.form.P1.R <- abs(res.form.P1.R)
mean(abs.form.P1.R)
## [1] 0.4242033
abs.form.LLSC <- abs(res.form.LLSC)
mean(abs.form.LLSC)
## [1] 0.9038801
abs.form.SALL <- abs(res.form.SALL)
mean(abs.form.SALL)
## [1] 0.3601306
abs.form.SBLL <- abs(res.form.SBLL)
mean(abs.form.SBLL)
## [1] 0.3399272
abs.form.BD <- abs(res.form.BD)
mean(abs.form.BD)
## [1] 0.6992201
abs.form.CPD <- abs(res.form.CPD)
mean(abs.form.CPD)
## [1] 0.3242864
abs.form.CPL <- abs(res.form.CPL)
mean(abs.form.CPL)
## [1] 0.4841018
abs.form.PreDL <- abs(res.form.PreDL)
mean(abs.form.PreDL)
## [1] 0.5943769
abs.form.DbL <- abs(res.form.DbL)
mean(abs.form.DbL)
## [1] 0.5507415
abs.form.HL <- abs(res.form.HL)
mean(abs.form.HL)
## [1] 0.7175548
abs.form.HD <- abs(res.form.HD)
mean(abs.form.HD)
## [1] 0.3866209
abs.form.HW <- abs(res.form.HW)
mean(abs.form.HW)
## [1] 0.3756333
abs.form.SnL <- abs(res.form.SnL)
mean(abs.form.SnL)
## [1] 0.2819469
abs.form.OL <- abs(res.form.OL)
mean(abs.form.OL)
## [1] 0.2013391
##### MEX #####
abs.mex.D <- abs(res.mex.D)
mean(abs.mex.D)
## [1] 0.1657275
abs.mex.P1 <- abs(res.mex.P1)
mean(abs.mex.P1)
## [1] 0.5686425
abs.mex.P1.R <- abs(res.mex.P1.R)
mean(abs.mex.P1.R)
## [1] 0.458723
abs.mex.LLSC <- abs(res.mex.LLSC)
mean(abs.mex.LLSC)
## [1] 0.4434954
abs.mex.SALL <- abs(res.mex.SALL)
mean(abs.mex.SALL)
## [1] 0.1248197
abs.mex.SBLL <- abs(res.mex.SBLL)
mean(abs.mex.SBLL)
## [1] 0.2713231
abs.mex.BD <- abs(res.mex.BD)
mean(abs.mex.BD)
## [1] 0.6849863
abs.mex.CPD <- abs(res.mex.CPD)
mean(abs.mex.CPD)
## [1] 0.3413966
abs.mex.CPL <- abs(res.mex.CPL)
mean(abs.mex.CPL)
## [1] 0.4422817
abs.mex.PreDL <- abs(res.mex.PreDL)
mean(abs.mex.PreDL)
## [1] 1.233006
abs.mex.DbL <- abs(res.mex.DbL)
mean(abs.mex.DbL)
## [1] 0.4028121
abs.mex.HL <- abs(res.mex.HL)
mean(abs.mex.HL)
## [1] 0.3375982
abs.mex.HD <- abs(res.mex.HD)
mean(abs.mex.HD)
## [1] 0.3266186
abs.mex.HW <- abs(res.mex.HW)
mean(abs.mex.HW)
## [1] 0.2523109
abs.mex.SnL <- abs(res.mex.SnL)
mean(abs.mex.SnL)
## [1] 0.263338
abs.mex.OL <- abs(res.mex.OL)
mean(abs.mex.OL)
## [1] 0.1157007
#let's get this into the raw1 data set so that we can plot this more easily
abs.res.D <- c(abs.lat.D, abs.form.D, abs.mex.D)
abs.res.P1 <- c(abs.lat.P1, abs.form.P1, abs.mex.P1)
abs.res.P1.R <- c(abs.lat.P1.R, abs.form.P1.R, abs.mex.P1.R)
abs.res.LLSC<- c(abs.lat.LLSC, abs.form.LLSC, abs.mex.LLSC)
abs.res.SALL<- c(abs.lat.SALL, abs.form.SALL, abs.mex.SALL)
abs.res.SBLL<- c(abs.lat.SBLL, abs.form.SBLL, abs.mex.SBLL)
abs.res.BD<- c(abs.lat.BD, abs.form.BD, abs.mex.BD)
abs.res.CPD<- c(abs.lat.CPD, abs.form.CPD, abs.mex.CPD)
abs.res.CPL<- c(abs.lat.CPL, abs.form.CPL, abs.mex.CPL)
abs.res.PreDL <- c(abs.lat.PreDL, abs.form.PreDL, abs.mex.PreDL)
abs.res.DbL <- c(abs.lat.DbL, abs.form.DbL, abs.mex.DbL)
abs.res.HL<- c(abs.lat.HL, abs.form.HL, abs.mex.HL)
abs.res.HD<- c(abs.lat.HD, abs.form.HD, abs.mex.HD)
abs.res.HW <- c(abs.lat.HW, abs.form.HW, abs.mex.HW)
abs.res.SnL <- c(abs.lat.SnL, abs.form.SnL, abs.mex.SnL)
abs.res.OL <- c(abs.lat.OL, abs.form.OL, abs.mex.OL)
raw2 <- cbind(raw1, abs.res.D, abs.res.P1, abs.res.P1.R, abs.res.LLSC, abs.res.SALL, abs.res.SBLL, abs.res.BD, abs.res.CPD, abs.res.CPL, abs.res.PreDL, abs.res.DbL, abs.res.HL, abs.res.HD, abs.res.HW, abs.res.SnL, abs.res.OL)
library(ggbeeswarm)
## Warning: package 'ggbeeswarm' was built under R version 4.1.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
ggplot(raw2, aes(SPP, abs.res.D)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
scatter_violin <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
geom_violin(trim = FALSE) +
stat_summary(
fun.data = "mean_sdl", fun.args = list(mult = 1),
geom = "pointrange", color = "black"
)
print(scatter_violin)
scatter_violin1 <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
geom_violin(trim = FALSE) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="crossbar", fill="red", width=0.03)
print(scatter_violin1)
ggplot(raw2, aes(SPP, abs.res.P1)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.P1.R)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.LLSC)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.SALL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.SBLL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.BD)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.CPD)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.CPL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.PreDL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.DbL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.HL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.HD)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.HW)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.SnL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
ggplot(raw2, aes(SPP, abs.res.OL)) +
geom_point(alpha=0.3) +
stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar",
width=0.03, colour="red", alpha=0.7) +
stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
(note: previously did F-test and T-test, but none of the traits are normal so I will redo using non-parametic tests. I moved old analysis to ‘morphology-analysis_final’ for records sake).
Levene’s test on residuals doesn’t make much sense; the residuals themselves are representative of the variation present, as they are the distance from the mean. Therefore, LT on residuals is like variance of the variance. Instead, I have to do a Mann-Whitney U test on the absolute value of the residuals. In this sense, we want to see if the mean of the absolute residuals is higher or lower for asexual species–is the average amount of variation higher or lower for this trait? Based on the regressions, if the trait was influenced by body size, I will perform a MWU on the absolute value of the residuals. If the trait was not influenced by body size, I will perform an LT of variance on the raw data.
Quick results summary: For the Levene’s test on raw data, none of the traits were significantly different (P2L/R, A, SBDF, FLA). For the MWU tests on residuals, the only significant traits are left pectoral (), right pectoral (lat>form), scales above lateral line (), scales below lateral line (form>lat), and head length ().
- will do two-tailed and check out the residual means to infer direction; for traits in which we use raw data, a one-tailed f-test will be perfomed in both direction to determine which species is varying more. We will also visulize the variation using a histogram to confirm direction results.
Will create a dataset of just amazons and sailfins, as this is my main comparison. I will do a test with the mexicana and the sailfins/amazons from Tampico later.
general formula: leveneTest(dependent~independent, dataframe) For me, I am interested in if the variance of a trait (dependent) differs between two species (independent), so it will be leveneTest(trait~spp, df).
Still only performing this on the traits that did NOT vary with SL (P2L/R, A, SBDF, FLA).
raw3 <- raw2[raw2$SPP !="p.mexicana", ]
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
leveneTest(P2.L~SPP, data=raw3) #gives nothing since it's all the same value
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 NaN NaN
## 298
leveneTest(P2.R~SPP, data=raw3) #same as above
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 NaN NaN
## 298
leveneTest(A~SPP, data=raw3)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.3962 0.5295
## 298
leveneTest(SBDF~SPP, data=raw3)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0081 0.9283
## 298
leveneTest(FLA~SPP, data=raw3)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 2.7164 0.1004
## 298
This will be performed on traits that DID vary with SL.
wilcox.test(abs.res.D~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.D by SPP
## W = 12186, p-value = 0.1545
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.05689062 0.19840357
## sample estimates:
## difference in location
## 0.1030553
wilcox.test(abs.res.P1~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.P1 by SPP
## W = 8765, p-value = 0.001606
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.23328614 -0.07245187
## sample estimates:
## difference in location
## -0.1618195
wilcox.test(abs.res.P1.R~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.P1.R by SPP
## W = 7363, p-value = 4.86e-07
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.2596445 -0.1543936
## sample estimates:
## difference in location
## -0.2082393
wilcox.test(abs.res.LLSC~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.LLSC by SPP
## W = 12119, p-value = 0.1822
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.03418245 0.19226705
## sample estimates:
## difference in location
## 0.0763304
wilcox.test(abs.res.SALL~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.SALL by SPP
## W = 15582, p-value = 2.369e-09
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## 0.02898618 0.06454412
## sample estimates:
## difference in location
## 0.04617539
wilcox.test(abs.res.SBLL~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.SBLL by SPP
## W = 14104, p-value = 6.565e-05
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## 0.02388613 0.07010299
## sample estimates:
## difference in location
## 0.04706298
wilcox.test(abs.res.BD~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.BD by SPP
## W = 10586, p-value = 0.4734
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.15634237 0.06828221
## sample estimates:
## difference in location
## -0.04128428
wilcox.test(abs.res.CPD~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.CPD by SPP
## W = 10435, p-value = 0.3581
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.07905433 0.02543525
## sample estimates:
## difference in location
## -0.02482814
wilcox.test(abs.res.CPL~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.CPL by SPP
## W = 11525, p-value = 0.59
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.05285391 0.09013647
## sample estimates:
## difference in location
## 0.01831735
wilcox.test(abs.res.D~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.D by SPP
## W = 12186, p-value = 0.1545
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.05689062 0.19840357
## sample estimates:
## difference in location
## 0.1030553
wilcox.test(abs.res.PreDL~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.PreDL by SPP
## W = 11357, p-value = 0.7536
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.07207504 0.10498813
## sample estimates:
## difference in location
## 0.01369883
wilcox.test(abs.res.DbL~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.DbL by SPP
## W = 9762, p-value = 0.06875
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.197721487 0.006187957
## sample estimates:
## difference in location
## -0.09311572
wilcox.test(abs.res.HL~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.HL by SPP
## W = 13199, p-value = 0.005437
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## 0.03552552 0.21746054
## sample estimates:
## difference in location
## 0.1238616
wilcox.test(abs.res.HD~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.HD by SPP
## W = 11348, p-value = 0.7627
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.04937667 0.06483465
## sample estimates:
## difference in location
## 0.00843405
wilcox.test(abs.res.HW~SPP, data=raw3, conf.int=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: abs.res.HW by SPP
## W = 11537, p-value = 0.579
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.03709909 0.06994149
## sample estimates:
## difference in location
## 0.01527595
Will run an ANOVA on the residuals with location and species as fixed effects. This will show me if morphology depends on the species, the location, and if the location and species interact to determine morphology.
I will first run this using the zones as the location factor. Zones (1-4) represent the latitude range with equivalent sample sizes in each, since the collections were not equally representative of all latitudes, and I wanted to avoid a sampling bias when randomly selecting samples. Zone 1 corresponds to the southern most latitude range, and zone 4 corresponds to the northern most latitude range.
I will then run the same analysis using basin as the location factor. Since fish are physically isolated to the river basins they occupy, the genetic variation is also limited to that basin. Thus it is possible for fish within the same basin to be more similar due to genetic/physical constraints. (will also do with watershed just to see).
Lastly I will run ANOVAs with both zones and basins but with standardized residuals. This would allow me to compare overall variation across traits (at least those that are depended on body size) rather than just one trait at a time. Not 100% sure if this is useful (or correct to do), but thought it would be interesting.
library(ggplot2)
A.D <- aov(abs.res.D ~ SPP*QUARTILE, data=raw3)
summary(A.D)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.06 0.0635 0.408 0.52344
## QUARTILE 3 0.45 0.1510 0.970 0.40715
## SPP:QUARTILE 3 3.22 1.0719 6.888 0.00017 ***
## Residuals 292 45.44 0.1556
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.D, fill=SPP)) +
geom_boxplot()
A.P1 <- aov(abs.res.P1 ~ SPP*QUARTILE, data=raw3)
summary(A.P1)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.29 0.2878 1.554 0.214
## QUARTILE 3 1.10 0.3663 1.977 0.117
## SPP:QUARTILE 3 0.73 0.2427 1.310 0.271
## Residuals 292 54.10 0.1853
A.P1.R <- aov(abs.res.P1.R ~ SPP*QUARTILE, data=raw3)
summary(A.P1.R)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 1.80 1.7983 11.423 0.000824 ***
## QUARTILE 3 0.13 0.0434 0.276 0.842918
## SPP:QUARTILE 3 0.06 0.0189 0.120 0.948162
## Residuals 292 45.97 0.1574
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.P1.R, fill=SPP)) +
geom_boxplot()
A.LLSC <- aov(abs.res.LLSC ~ SPP*QUARTILE, data=raw3)
summary(A.LLSC)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 1.40 1.4013 2.940 0.0875 .
## QUARTILE 3 1.35 0.4506 0.945 0.4190
## SPP:QUARTILE 3 3.42 1.1399 2.391 0.0688 .
## Residuals 292 139.19 0.4767
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A.SALL <- aov(abs.res.SALL ~ SPP*QUARTILE, data=raw3)
summary(A.SALL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.14 0.1352 1.151 0.284
## QUARTILE 3 3.23 1.0782 9.182 7.99e-06 ***
## SPP:QUARTILE 3 0.25 0.0845 0.720 0.541
## Residuals 292 34.29 0.1174
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.SALL, fill=SPP)) +
geom_boxplot()
A.SBLL <- aov(abs.res.SBLL ~ SPP*QUARTILE, data=raw3)
summary(A.SBLL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.65 0.6523 4.859 0.0283 *
## QUARTILE 3 0.78 0.2591 1.930 0.1248
## SPP:QUARTILE 3 0.12 0.0390 0.291 0.8320
## Residuals 292 39.20 0.1342
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.SBLL, fill=SPP)) +
geom_boxplot()
A.BD <- aov(abs.res.BD ~ SPP*QUARTILE, data=raw3)
summary(A.BD)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.33 0.3327 1.006 0.3166
## QUARTILE 3 1.11 0.3694 1.117 0.3422
## SPP:QUARTILE 3 3.11 1.0364 3.136 0.0259 *
## Residuals 292 96.51 0.3305
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.BD, fill=SPP)) +
geom_boxplot()
A.CPD <- aov(abs.res.CPD ~ SPP*QUARTILE, data=raw3)
summary(A.CPD)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.151 0.1507 1.606 0.2061
## QUARTILE 3 1.020 0.3399 3.622 0.0136 *
## SPP:QUARTILE 3 0.093 0.0311 0.332 0.8025
## Residuals 292 27.405 0.0939
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.CPD, fill=SPP)) +
geom_boxplot()
A.CPL <- aov(abs.res.CPL ~ SPP*QUARTILE, data=raw3)
summary(A.CPL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.03 0.0320 0.217 0.6418
## QUARTILE 3 1.51 0.5018 3.402 0.0182 *
## SPP:QUARTILE 3 0.13 0.0442 0.300 0.8257
## Residuals 292 43.07 0.1475
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.CPL, fill=SPP)) +
geom_boxplot()
A.PreDL <- aov(abs.res.PreDL ~ SPP*QUARTILE, data=raw3)
summary(A.PreDL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.05 0.0468 0.235 0.6282
## QUARTILE 3 0.41 0.1357 0.681 0.5640
## SPP:QUARTILE 3 2.22 0.7416 3.724 0.0118 *
## Residuals 292 58.15 0.1991
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.PreDL, fill=SPP)) +
geom_boxplot()
A.DbL <- aov(abs.res.DbL ~ SPP*QUARTILE, data=raw3)
summary(A.DbL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 1.53 1.5287 5.617 0.0184 *
## QUARTILE 3 0.85 0.2838 1.043 0.3740
## SPP:QUARTILE 3 0.51 0.1698 0.624 0.6002
## Residuals 292 79.48 0.2722
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.DbL, fill=SPP)) +
geom_boxplot()
A.HL <- aov(abs.res.HL ~ SPP*QUARTILE, data=raw3)
summary(A.HL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 2.91 2.908 6.779 0.00969 **
## QUARTILE 3 3.96 1.320 3.077 0.02795 *
## SPP:QUARTILE 3 6.08 2.028 4.727 0.00309 **
## Residuals 292 125.28 0.429
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.HL, fill=SPP)) +
geom_boxplot()
A.HD <- aov(abs.res.HD ~ SPP*QUARTILE, data=raw3)
summary(A.HD)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.01 0.01253 0.114 0.736
## QUARTILE 3 0.11 0.03551 0.323 0.809
## SPP:QUARTILE 3 0.27 0.09140 0.831 0.478
## Residuals 292 32.12 0.11001
A.HW <- aov(abs.res.HW ~ SPP*QUARTILE, data=raw3)
summary(A.HW)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.037 0.0373 0.414 0.520577
## QUARTILE 3 1.713 0.5710 6.336 0.000356 ***
## SPP:QUARTILE 3 0.562 0.1875 2.081 0.102864
## Residuals 292 26.311 0.0901
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.HW, fill=SPP)) +
geom_boxplot()
A.SnL <- aov(abs.res.SnL ~ SPP*QUARTILE, data=raw3)
summary(A.SnL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.41 0.4066 1.144 0.286
## QUARTILE 3 0.93 0.3090 0.869 0.457
## SPP:QUARTILE 3 1.19 0.3977 1.119 0.342
## Residuals 292 103.79 0.3554
A.OL <- aov(abs.res.OL ~ SPP*QUARTILE, data=raw3)
summary(A.OL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.125 0.12542 3.927 0.0485 *
## QUARTILE 3 0.167 0.05572 1.745 0.1579
## SPP:QUARTILE 3 0.187 0.06248 1.956 0.1207
## Residuals 292 9.326 0.03194
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.OL, fill=SPP)) +
geom_boxplot()
A1.D <- aov(abs.res.D ~ SPP*BASIN, data=raw3)
summary(A1.D)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.06 0.0635 0.402 0.52666
## BASIN 6 3.12 0.5205 3.293 0.00377 **
## SPP:BASIN 3 0.31 0.1028 0.650 0.58344
## Residuals 289 45.68 0.1581
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.D, fill=SPP)) +
geom_boxplot()
A1.P1 <- aov(abs.res.P1 ~ SPP*BASIN, data=raw3)
summary(A1.P1)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.29 0.2878 1.528 0.217
## BASIN 6 0.97 0.1613 0.856 0.528
## SPP:BASIN 3 0.53 0.1751 0.930 0.427
## Residuals 289 54.43 0.1883
A1.P1.R <- aov(abs.res.P1.R ~ SPP*BASIN, data=raw3)
summary(A1.P1.R)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 1.80 1.7983 11.421 0.000826 ***
## BASIN 6 0.51 0.0845 0.537 0.780341
## SPP:BASIN 3 0.15 0.0485 0.308 0.819718
## Residuals 289 45.50 0.1575
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.P1.R, fill=SPP)) +
geom_boxplot()
A1.LLSC <- aov(abs.res.LLSC ~ SPP*BASIN, data=raw3)
summary(A1.LLSC)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 1.40 1.4013 2.999 0.0844 .
## BASIN 6 6.83 1.1376 2.435 0.0260 *
## SPP:BASIN 3 2.11 0.7026 1.504 0.2137
## Residuals 289 135.02 0.4672
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.LLSC, fill=SPP)) +
geom_boxplot()
A1.SALL <- aov(abs.res.SALL ~ SPP*BASIN, data=raw3)
summary(A1.SALL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.14 0.1352 1.156 0.283
## BASIN 6 3.54 0.5905 5.051 6.03e-05 ***
## SPP:BASIN 3 0.45 0.1496 1.280 0.281
## Residuals 289 33.79 0.1169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.SALL, fill=SPP)) +
geom_boxplot()
A1.SBLL <- aov(abs.res.SBLL ~ SPP*BASIN, data=raw3)
summary(A1.SBLL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.65 0.6523 4.895 0.0277 *
## BASIN 6 1.07 0.1785 1.340 0.2393
## SPP:BASIN 3 0.51 0.1716 1.288 0.2788
## Residuals 289 38.51 0.1332
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.SBLL, fill=SPP)) +
geom_boxplot()
A1.BD <- aov(abs.res.BD ~ SPP*BASIN, data=raw3)
summary(A1.BD)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.33 0.3327 1.050 0.30643
## BASIN 6 6.94 1.1571 3.651 0.00165 **
## SPP:BASIN 3 2.20 0.7349 2.319 0.07561 .
## Residuals 289 91.58 0.3169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.BD, fill=SPP)) +
geom_boxplot()
A1.CPD <- aov(abs.res.CPD ~ SPP*BASIN, data=raw3)
summary(A1.CPD)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.151 0.15069 1.626 0.20327
## BASIN 6 1.601 0.26682 2.879 0.00968 **
## SPP:BASIN 3 0.135 0.04508 0.486 0.69198
## Residuals 289 26.782 0.09267
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.CPD, fill=SPP)) +
geom_boxplot()
A1.CPL <- aov(abs.res.CPL ~ SPP*BASIN, data=raw3)
summary(A1.CPL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.03 0.03199 0.213 0.645
## BASIN 6 1.00 0.16720 1.114 0.354
## SPP:BASIN 3 0.34 0.11217 0.748 0.525
## Residuals 289 43.36 0.15005
A1.PreDL <- aov(abs.res.PreDL ~ SPP*BASIN, data=raw3)
summary(A1.PreDL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.05 0.0468 0.237 0.6270
## BASIN 6 2.04 0.3406 1.722 0.1155
## SPP:BASIN 3 1.58 0.5261 2.660 0.0484 *
## Residuals 289 57.16 0.1978
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.PreDL, fill=SPP)) +
geom_boxplot()
A1.DbL <- aov(abs.res.DbL ~ SPP*BASIN, data=raw3)
summary(A1.DbL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 1.53 1.5287 5.614 0.0185 *
## BASIN 6 1.69 0.2823 1.037 0.4015
## SPP:BASIN 3 0.44 0.1482 0.544 0.6524
## Residuals 289 78.70 0.2723
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.DbL, fill=SPP)) +
geom_boxplot()
A1.HL <- aov(abs.res.HL ~ SPP*BASIN, data=raw3)
summary(A1.HL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 2.91 2.9084 6.563 0.0109 *
## BASIN 6 3.83 0.6377 1.439 0.1995
## SPP:BASIN 3 3.42 1.1414 2.576 0.0541 .
## Residuals 289 128.07 0.4431
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.HL, fill=SPP)) +
geom_boxplot()
A1.HD <- aov(abs.res.HD ~ SPP*BASIN, data=raw3)
summary(A1.HD)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.013 0.0125 0.118 0.7311
## BASIN 6 0.845 0.1408 1.330 0.2437
## SPP:BASIN 3 1.054 0.3514 3.318 0.0203 *
## Residuals 289 30.605 0.1059
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.HD, fill=SPP)) +
geom_boxplot()
A1.HW <- aov(abs.res.HW ~ SPP*BASIN, data=raw3)
summary(A1.HW)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.037 0.03728 0.403 0.52622
## BASIN 6 1.676 0.27941 3.018 0.00708 **
## SPP:BASIN 3 0.151 0.05048 0.545 0.65172
## Residuals 289 26.759 0.09259
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.HW, fill=SPP)) +
geom_boxplot()
A1.SnL <- aov(abs.res.SnL ~ SPP*BASIN, data=raw3)
summary(A1.SnL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.41 0.4066 1.139 0.287
## BASIN 6 1.63 0.2711 0.759 0.602
## SPP:BASIN 3 1.10 0.3677 1.030 0.380
## Residuals 289 103.18 0.3570
A1.OL <- aov(abs.res.OL ~ SPP*BASIN, data=raw3)
summary(A1.OL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.125 0.12542 3.995 0.0466 *
## BASIN 6 0.315 0.05257 1.674 0.1271
## SPP:BASIN 3 0.292 0.09728 3.099 0.0272 *
## Residuals 289 9.073 0.03139
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.OL, fill=SPP)) +
geom_boxplot()
A2.D <- aov(abs.res.D ~ SPP*WATERSHED, data=raw3)
summary(A2.D)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.06 0.0635 0.415 0.5198
## WATERSHED 13 4.33 0.3331 2.178 0.0106 *
## SPP:WATERSHED 5 1.96 0.3922 2.564 0.0274 *
## Residuals 280 42.82 0.1529
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.D, fill=SPP)) +
geom_boxplot()
A2.P1 <- aov(abs.res.P1 ~ SPP*WATERSHED, data=raw3)
summary(A2.P1)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.29 0.2878 1.568 0.2116
## WATERSHED 13 3.73 0.2868 1.562 0.0957 .
## SPP:WATERSHED 5 0.78 0.1569 0.855 0.5120
## Residuals 280 51.41 0.1836
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A2.P1.R <- aov(abs.res.P1.R ~ SPP*WATERSHED, data=raw3)
summary(A2.P1.R)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 1.80 1.7983 11.145 0.000957 ***
## WATERSHED 13 0.82 0.0629 0.390 0.972416
## SPP:WATERSHED 5 0.16 0.0322 0.199 0.962521
## Residuals 280 45.18 0.1614
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.P1.R, fill=SPP)) +
geom_boxplot()
A2.LLSC <- aov(abs.res.LLSC ~ SPP*WATERSHED, data=raw3)
summary(A2.LLSC)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 1.40 1.4013 3.299 0.0704 .
## WATERSHED 13 20.28 1.5596 3.672 2.15e-05 ***
## SPP:WATERSHED 5 4.75 0.9502 2.237 0.0509 .
## Residuals 280 118.93 0.4248
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.LLSC, fill=SPP)) +
geom_boxplot()
A2.SALL <- aov(abs.res.SALL ~ SPP*WATERSHED, data=raw3)
summary(A2.SALL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.135 0.1352 1.233 0.26772
## WATERSHED 13 5.314 0.4088 3.730 1.67e-05 ***
## SPP:WATERSHED 5 1.779 0.3557 3.246 0.00725 **
## Residuals 280 30.685 0.1096
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.SALL, fill=SPP)) +
geom_boxplot()
A2.SBLL <- aov(abs.res.SBLL ~ SPP*WATERSHED, data=raw3)
summary(A2.SBLL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.65 0.6523 5.611 0.0185 *
## WATERSHED 13 2.98 0.2289 1.969 0.0233 *
## SPP:WATERSHED 5 4.57 0.9138 7.861 6.06e-07 ***
## Residuals 280 32.55 0.1162
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.SBLL, fill=SPP)) +
geom_boxplot()
A2.BD <- aov(abs.res.BD ~ SPP*WATERSHED, data=raw3)
summary(A2.BD)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.33 0.3327 1.076 0.30056
## WATERSHED 13 10.34 0.7951 2.571 0.00225 **
## SPP:WATERSHED 5 3.80 0.7607 2.460 0.03344 *
## Residuals 280 86.59 0.3093
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.BD, fill=SPP)) +
geom_boxplot()
A2.CPD <- aov(abs.res.CPD ~ SPP*WATERSHED, data=raw3)
summary(A2.CPD)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.151 0.15069 1.809 0.17966
## WATERSHED 13 3.765 0.28965 3.478 4.98e-05 ***
## SPP:WATERSHED 5 1.435 0.28691 3.445 0.00488 **
## Residuals 280 23.318 0.08328
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.CPD, fill=SPP)) +
geom_boxplot()
A2.CPL <- aov(abs.res.CPL ~ SPP*WATERSHED, data=raw3)
summary(A2.CPL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.03 0.03199 0.218 0.641
## WATERSHED 13 2.52 0.19397 1.319 0.201
## SPP:WATERSHED 5 1.01 0.20254 1.377 0.233
## Residuals 280 41.17 0.14703
A2.PreDL <- aov(abs.res.PreDL ~ SPP*WATERSHED, data=raw3)
summary(A2.PreDL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.05 0.0468 0.236 0.6272
## WATERSHED 13 4.34 0.3336 1.685 0.0636 .
## SPP:WATERSHED 5 1.01 0.2019 1.020 0.4060
## Residuals 280 55.43 0.1980
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A2.DbL <- aov(abs.res.DbL ~ SPP*WATERSHED, data=raw3)
summary(A2.DbL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 1.53 1.5287 6.247 0.013 *
## WATERSHED 13 4.76 0.3665 1.498 0.117
## SPP:WATERSHED 5 7.55 1.5107 6.173 1.9e-05 ***
## Residuals 280 68.52 0.2447
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.DbL, fill=SPP)) +
geom_boxplot()
A2.HL <- aov(abs.res.HL ~ SPP*WATERSHED, data=raw3)
summary(A2.HL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 2.91 2.9084 6.844 0.00938 **
## WATERSHED 13 10.21 0.7856 1.849 0.03602 *
## SPP:WATERSHED 5 6.11 1.2227 2.877 0.01497 *
## Residuals 280 118.99 0.4250
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.HL, fill=SPP)) +
geom_boxplot()
A2.HD <- aov(abs.res.HD ~ SPP*WATERSHED, data=raw3)
summary(A2.HD)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.013 0.0125 0.125 0.724090
## WATERSHED 13 2.141 0.1647 1.641 0.073640 .
## SPP:WATERSHED 5 2.270 0.4541 4.526 0.000553 ***
## Residuals 280 28.093 0.1003
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.HD, fill=SPP)) +
geom_boxplot()
A2.HW <- aov(abs.res.HW ~ SPP*WATERSHED, data=raw3)
summary(A2.HW)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.037 0.03728 0.408 0.52356
## WATERSHED 13 2.615 0.20116 2.201 0.00974 **
## SPP:WATERSHED 5 0.380 0.07593 0.831 0.52868
## Residuals 280 25.592 0.09140
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.HW, fill=SPP)) +
geom_boxplot()
A2.SnL <- aov(abs.res.SnL ~ SPP*WATERSHED, data=raw3)
summary(A2.SnL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.41 0.4066 1.132 0.288
## WATERSHED 13 2.76 0.2124 0.592 0.860
## SPP:WATERSHED 5 2.61 0.5222 1.454 0.205
## Residuals 280 100.53 0.3591
A2.OL <- aov(abs.res.OL ~ SPP*WATERSHED, data=raw3)
summary(A2.OL)
## Df Sum Sq Mean Sq F value Pr(>F)
## SPP 1 0.125 0.12542 4.005 0.0463 *
## WATERSHED 13 0.670 0.05153 1.646 0.0726 .
## SPP:WATERSHED 5 0.243 0.04859 1.552 0.1740
## Residuals 280 8.767 0.03131
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.OL, fill=SPP)) +
geom_boxplot()
The ANOVAs above focus on differences of particular traits as a factor of species and location. If we want to get an idea of variation in general as a factor of species and location, we can standardize the residuals (essentially unitless z-scores of residuals).
sd.res.D <- append(abs(sd.lat.D), abs(sd.form.D))
sd.res.P1 <- append(abs(sd.lat.P1), abs(sd.form.P1))
sd.res.P1.R <- append(abs(sd.lat.P1.R), abs(sd.form.P1.R))
sd.res.LLSC<- append(abs(sd.lat.LLSC), abs(sd.form.LLSC))
sd.res.SALL<- append(abs(sd.lat.SALL), abs(sd.form.SALL))
sd.res.SBLL<- append(abs(sd.lat.SBLL), abs(sd.form.SBLL))
sd.res.BD<- append(abs(sd.lat.BD), abs(sd.form.BD))
sd.res.CPD<- append(abs(sd.lat.CPD), abs(sd.form.CPD))
sd.res.CPL<- append(abs(sd.lat.CPL), abs(sd.form.CPL))
sd.res.PreDL <- append(abs(sd.lat.PreDL), abs(sd.form.PreDL))
sd.res.DbL <- append(abs(sd.lat.DbL), abs(sd.form.DbL))
sd.res.HL<- append(abs(sd.lat.HL), abs(sd.form.HL))
sd.res.HD<- append(abs(sd.lat.HD), abs(sd.form.HD))
sd.res.HW <- append(abs(sd.lat.HW), abs(sd.form.HW))
sd.res.SnL <- append(abs(sd.lat.SnL), abs(sd.form.SnL))
sd.res.OL <- append(abs(sd.lat.OL), abs(sd.form.OL))
raw4 <- cbind(raw3, sd.res.D, sd.res.P1, sd.res.P1.R, sd.res.LLSC, sd.res.SALL, sd.res.SBLL, sd.res.BD, sd.res.CPD, sd.res.CPL, sd.res.PreDL, sd.res.DbL, sd.res.HL, sd.res.HD, sd.res.HW, sd.res.SnL, sd.res.OL)
raw5 <- cbind(raw4[1:14], stack(raw4[53:68]))
## Warning in data.frame(..., check.names = FALSE): row names were found from a
## short variable and have been discarded
lat.raw5 <- raw5[raw5$SPP == "p.latipinna",]
form.raw5 <- raw5[raw5$SPP == "p.formosa",]
######ZONES#####
A3.lat <- aov(values~QUARTILE, data=lat.raw5)
summary(A3.lat)
## Df Sum Sq Mean Sq F value Pr(>F)
## QUARTILE 3 9.8 3.274 7.1 9.57e-05 ***
## Residuals 2140 986.6 0.461
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A3.form <- aov(values~QUARTILE, data=form.raw5)
summary(A3.form)
## Df Sum Sq Mean Sq F value Pr(>F)
## QUARTILE 3 2.5 0.8491 1.992 0.113
## Residuals 2652 1130.4 0.4263
#between species
A3 <- aov(values~QUARTILE*SPP, data=raw5)
summary(A3)
## Df Sum Sq Mean Sq F value Pr(>F)
## QUARTILE 3 8.1 2.7123 6.139 0.000366 ***
## SPP 1 0.3 0.2740 0.620 0.430974
## QUARTILE:SPP 3 4.8 1.5926 3.605 0.012837 *
## Residuals 4792 2117.1 0.4418
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw5, aes(x=factor(QUARTILE), y=values, fill=SPP)) +
geom_boxplot()
######BASINS#####
A4.lat <- aov(values~BASIN, data=lat.raw5)
summary(A4.lat)
## Df Sum Sq Mean Sq F value Pr(>F)
## BASIN 5 12.9 2.579 5.605 3.87e-05 ***
## Residuals 2138 983.6 0.460
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A4.form <- aov(values~BASIN, data=form.raw5)
summary(A4.form)
## Df Sum Sq Mean Sq F value Pr(>F)
## BASIN 4 15.6 3.912 9.281 1.91e-07 ***
## Residuals 2651 1117.3 0.421
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#between species
A4 <- aov(values~BASIN*SPP, data=raw5)
summary(A4)
## Df Sum Sq Mean Sq F value Pr(>F)
## BASIN 6 27.9 4.657 10.616 9.59e-12 ***
## SPP 1 0.1 0.058 0.133 0.715
## BASIN:SPP 3 1.4 0.453 1.034 0.376
## Residuals 4789 2100.9 0.439
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw5, aes(x=factor(BASIN), y=values, fill=SPP)) +
geom_boxplot()
#####WATERSHEDS#####
A5.lat <- aov(values~WATERSHED, data=lat.raw5)
summary(A5.lat)
## Df Sum Sq Mean Sq F value Pr(>F)
## WATERSHED 11 26.6 2.4194 5.319 2.33e-08 ***
## Residuals 2132 969.8 0.4549
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A5.form <- aov(values~WATERSHED, data=form.raw5)
summary(A5.form)
## Df Sum Sq Mean Sq F value Pr(>F)
## WATERSHED 7 25.7 3.669 8.775 1.06e-10 ***
## Residuals 2648 1107.3 0.418
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#between species
A5 <- aov(values~WATERSHED*SPP, data=raw5)
summary(A5)
## Df Sum Sq Mean Sq F value Pr(>F)
## WATERSHED 13 40.6 3.1194 7.179 4.58e-14 ***
## SPP 1 0.3 0.2814 0.648 0.421
## WATERSHED:SPP 5 12.3 2.4570 5.654 3.33e-05 ***
## Residuals 4780 2077.1 0.4345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw5, aes(x=factor(WATERSHED), y=values, fill=SPP)) +
geom_boxplot()
LOGAN: CHECK THAT EACH VARIABLES IS NEAR NORMALLY DISTRIBUTED. IF NOT, LOG TRANSFORM BEFORE PCA. ALSO CHECK THAT PCA CALCULATES Z SCORES AND PLOTS BASED ON THAT; IF NOT CONVERT TO Z SCORES THEN RUN PCA.
In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine what aspects of morphology influence the variation in our data the most without worrying about differences in scales/measurements. Currently, data consists of 116 Sailfin and 186 Amazon.
Variable chart:
D: dorsal ray count
P1: left pectoral ray count
P2.L: left pelvic rays
P2.R: right pelvic rays
A: anal rays
P1.R: right pectoral ray count
LLSC: lateral line scale count
SALL: scales above lateral line
SBLL: scales below lateral line
SBDF: scales before dorsal fin
TL: total length
SL: standard length
BD: body depth
CPD: caudal peduncle depth
CPL: caudal peduncle length
PreDL: pre-dorsal length
DbL: dorsal base length
HL/HW/HD: head length/width/depth
SnL: snout length
OL: ocular length
summary of PCs
{r, echo=FALSE}
PCA <- prcomp(raw1[, 10:31], center=TRUE, scale. = TRUE) #includes all 22 traits summary(PCA) loadings <- PCA$rotation loadings[, 1:5]
VM_PCA <- varimax(PCA$rotation) summary(VM_PCA)
{r, echo=FALSE}
library(AMR) library(ggplot2)
library(ggfortify)
evplot <- function(ev) { # Broken stick model (MacArthur 1957) n <- length(ev) bsm <- data.frame(j=seq(1:n), p=0) bsm\(p[1] <- 1/n for (i in 2:n) bsm\)p[i] <- bsm\(p[i-1] + (1/(n + 1 - i)) bsm\)p <- 100*bsm\(p/n # Plot eigenvalues and % of variation for each axis op <- par(mfrow=c(2,1)) barplot(ev, main="Eigenvalues", col="bisque", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n") barplot(t(cbind(100*ev/sum(ev), bsm\)p[n:1])), beside=TRUE, main=“% variation”, col=c(“bisque”,2), las=2) legend(“topright”, c(“% eigenvalue”, “Broken stick model”), pch=15, col=c(“bisque”,2), bty=“n”) par(op) }
ev <- PCA$sdev^2 evplot(ev) #according to Kaiser-Guttman criteron, we can use the first 4 PCs, even though the broken stick model shows only the first above the red bar plot… not 100% confident I know what this means, but pretty sure PC1 is body size
plot6<- autoplot(PCA, data = raw1, colour=‘SPP’, loadings=TRUE, loadings.colour=‘navyblue’, loadings.label=TRUE, loadings.label.colour=‘navyblue’, loadings.label.size=5, loadings.label.vjust= 1, loadings.label.hjust= 1.2, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot6
plot6.1<- autoplot(PCA, data = raw1, colour=‘SPP’, loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot6.1
plot7<- autoplot(PCA, data = raw1, colour=‘QUARTILE’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot7
plot7A<- autoplot(PCA, data = raw1, colour=‘BASIN’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot7A
plot7B<- autoplot(PCA, data = raw1, colour=‘WATERSHED’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot7B
plot8<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘SPP’, loadings=TRUE, loadings.colour=‘navyblue’, loadings.label=TRUE, loadings.label.colour=‘navyblue’, loadings.label.size=5, loadings.label.vjust= 1, loadings.label.hjust= 1.2, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot8
plot8.1<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘SPP’, loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot8.1
plot9<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘QUARTILE’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot9
plot9A<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘BASIN’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot9A
plot9B<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘WATERSHED’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot9B
PCA2 <- prcomp(raw2[, 33:48], center=TRUE, scale. = TRUE) #includes all 22 traits summary(PCA2) loadings1 <- PCA2$rotation loadings1[, 1:5]
plot10<- autoplot(PCA2, x=1, y=2, data = raw2, colour=‘SPP’, loadings=TRUE, loadings.colour=‘navyblue’, loadings.label=TRUE, loadings.label.colour=‘navyblue’, loadings.label.size=5, loadings.label.vjust= 1, loadings.label.hjust= 1.2, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology trait Residuals”) + theme_minimal() plot10
plot11<- autoplot(PCA2, x=1, y=2, data = raw2, colour=‘SPP’, loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot11
Variable chart: